Evolution of the number of recordings for a composer

In [1]:
from datetime import datetime
%run startup.ipy
Last notebook update: 2018-06-06
Git repo: git@bitbucket.org:loujine/musicbrainz-dataviz.git
Importing libs
Defining database parameters

Defining *sql* helper function
Last database update: 2018-06-02

Python packages versions:
numpy       1.14.3
pandas      0.23.0
sqlalchemy  1.2.8
CPython 3.7.0b5
IPython 6.4.0
In [2]:
import pandas as pd
pd.set_option('display.max_colwidth', -1)
In [3]:
import matplotlib.pyplot as plt
%matplotlib inline

1

sql(""" SELECT a.name, COUNT(*) AS cnt -- , w.name FROM work AS w JOIN l_artist_work AS law ON w.id = law.entity1 JOIN artist AS a ON a.id = law.entity0 JOIN link AS l ON l.id = law.link WHERE l.link_type = 168 AND a.area=98 GROUP BY a.name ORDER BY cnt DESC ; """)

2

In [4]:
# lib/MusicBrainz/Server/Constants.pm
EDIT_MEDIUM_CREATE = 51
EDIT_RECORDING_CREATE = 71
EDIT_RECORDING_EDIT = 72
EDIT_RECORDING_DELETE = 73
EDIT_RECORDING_MERGE = 74

def current_recording_count(artist_name):
    return sql("""
SELECT COUNT(*) AS cnt
  FROM artist             AS a
  JOIN artist_credit_name AS acn ON a.id = acn.artist
  JOIN artist_credit      AS ac  ON ac.id = acn.artist_credit
  JOIN recording          AS r   ON r.artist_credit = ac.id
 WHERE a.name = %(artist_name)s
""", artist_name=artist_name).cnt[0]
    
def edits_from_new_medium(artist_name):
    artist_id = sql("SELECT id FROM artist WHERE name=%(artist_name)s", 
                    artist_name=artist_name).id[0]
    df = sql("""
SELECT date_trunc('day', edit.open_time) AS date,
       edit.id                           AS new,
       edit_data.data->'tracklist'       AS tracklist,
       editor.name                       AS editor
  FROM artist
  JOIN edit_artist ON artist.id = edit_artist.artist
  JOIN edit        ON edit.id = edit_artist.edit
  JOIN edit_data   ON edit_data.edit = edit_artist.edit
  JOIN editor      ON editor.id = edit.editor
 WHERE artist.name = %(artist_name)s
   AND edit.type = %(EDIT_TYPE)s
    ;""", artist_name=artist_name, EDIT_TYPE=EDIT_MEDIUM_CREATE)
    df.index = df['date'].apply(lambda d: d.date())
    del df['date']
    df['new'] = df.tracklist.apply(lambda tl: [t['artist_credit']['names'][0]['artist']['id'] 
                                               for t in tl].count(artist_id))
    del df['tracklist']
    return df

def edits_from_standalone_recordings(artist_name):
    df = sql("""
SELECT date_trunc('day', edit.open_time) AS date,
       edit.id                           AS standalone,
       editor.name                       AS editor
  FROM artist
  JOIN edit_artist ON artist.id = edit_artist.artist
  JOIN edit        ON edit.id = edit_artist.edit
  JOIN edit_data   ON edit_data.edit = edit_artist.edit
  JOIN editor      ON editor.id = edit.editor
 WHERE artist.name = %(artist_name)s
   AND edit.type = %(EDIT_TYPE)s
    ;""", artist_name=artist_name, EDIT_TYPE=EDIT_RECORDING_CREATE)
    df.index = df['date'].apply(lambda d: d.date())
    del df['date']
    return df

def edits_from_recordings_changed_from(artist_name):
    df = sql("""    
SELECT date_trunc('day', edit.open_time) AS date,
       edit.id                           AS changed_from,
       CASE WHEN edit_note.text ILIKE E'%%\r\nGM script:%%' THEN 1
            ELSE 0
       END                               AS userscript,
       editor.name                       AS editor
  FROM artist
  JOIN edit_artist ON artist.id = edit_artist.artist
  JOIN edit        ON edit.id = edit_artist.edit
  JOIN edit_data   ON edit_data.edit = edit_artist.edit
  JOIN editor      ON editor.id = edit.editor
  JOIN edit_note   ON edit_note.edit = edit.id
 WHERE artist.name = %(artist_name)s
   AND edit.type = %(EDIT_TYPE)s
   AND artist.id = CAST(edit_data.data->'old'->'artist_credit'->'names'->0->'artist'->>'id' AS integer)
    ;""", artist_name=artist_name, EDIT_TYPE=EDIT_RECORDING_EDIT)
    df.index = df['date'].apply(lambda d: d.date())
    del df['date']
    return df

def edits_from_recordings_changed_to(artist_name):
    df = sql("""    
SELECT date_trunc('day', edit.open_time) AS date,
       edit.id                           AS changed_to,
       editor.name                       AS editor
  FROM artist
  JOIN edit_artist ON artist.id = edit_artist.artist
  JOIN edit        ON edit.id = edit_artist.edit
  JOIN edit_data   ON edit_data.edit = edit_artist.edit
  JOIN editor      ON editor.id = edit.editor
 WHERE artist.name = %(artist_name)s
   AND edit.type = %(EDIT_TYPE)s
   AND artist.id = CAST(edit_data.data->'new'->'artist_credit'->'names'->0->'artist'->>'id' AS integer)
    ;""", artist_name=artist_name, EDIT_TYPE=EDIT_RECORDING_EDIT)
    df.index = df['date'].apply(lambda d: d.date())
    del df['date']
    return df

def edits_from_recordings_merge(artist_name):
    df = sql("""    
SELECT date_trunc('day', edit.open_time) AS date,
       edit.id                           AS merged,
       editor.name                       AS editor
  FROM artist
  JOIN edit_artist ON artist.id = edit_artist.artist
  JOIN edit        ON edit.id = edit_artist.edit
  JOIN edit_data   ON edit_data.edit = edit_artist.edit
  JOIN editor      ON editor.id = edit.editor
 WHERE artist.name = %(artist_name)s
   AND edit.type = %(EDIT_TYPE)s
    ;""", artist_name=artist_name, EDIT_TYPE=EDIT_RECORDING_MERGE)
    df.index = df['date'].apply(lambda d: d.date())
    del df['date']
    return df

sql(""" SELECT a.name, COUNT(*) AS cnt -- , w.name FROM work AS w JOIN l_artist_work AS law ON w.id = law.entity1 JOIN artist AS a ON a.id = law.entity0 JOIN link AS l ON l.id = law.link WHERE l.link_type = 168 AND a.begin_date_year < 1700 -- AND a.area=98 GROUP BY a.name ORDER BY cnt DESC LIMIT 10 ; """)

In [5]:
def recording_evolution(artist_name, editor='loujin'):
    current = current_recording_count(artist_name)
    df_new = edits_from_new_medium(artist_name)
    df_standalone = edits_from_standalone_recordings(artist_name)
    df_changed_to = edits_from_recordings_changed_to(artist_name)
    df_changed_from = edits_from_recordings_changed_from(artist_name)
    df_merged = edits_from_recordings_merge(artist_name)
    df = pd.concat([df_new.new.groupby('date').count(),
                    df_standalone.standalone.groupby('date').count(),
                    df_changed_to.changed_to.groupby('date').count(),
                    - df_changed_from.changed_from.groupby('date').count(),
                    - df_merged.merged.groupby('date').count(),
                   ], axis=1, sort=True).fillna(0)
    df['total'] = (df.new + df.standalone + df.changed_to + df.changed_from + df.merged).cumsum()
    df['total'] += current - df['total'][-1]
    df['userscript'] = - df_changed_from.userscript.groupby('date').count()
    df['userscript'] = df['userscript'].fillna(0)
    df['total_userscript'] = df.userscript.cumsum()
    df['total_userscript'] += current - df['total_userscript'][-1]

    df_editor = pd.concat([df_new.new[df_new.editor == editor].groupby('date').count(),
                           df_standalone.standalone[df_standalone.editor == editor].groupby('date').count(),
                           df_changed_to.changed_to[df_changed_to.editor == editor].groupby('date').count(),
                           df_changed_from.changed_from[df_changed_from.editor == editor].groupby('date').count(),
                           df_merged.merged[df_merged.editor == editor].groupby('date').count(),
                          ], axis=1, sort=True).fillna(0)
    df[editor] = (df_editor.new + df_editor.standalone + df_editor.changed_to 
                  - df_editor.changed_from - df_editor.merged)
    df['total_' + editor] = df[editor].fillna(0).cumsum()
    df['total_' + editor] += current - df['total_' + editor][-1]
    
    iplot({
        'data': [{
            'x': df.index, 
            'y': df[col],
            'name': col,
        } for col in df.columns],
        #'data': [{'x': [1,2], 'y': [3,4]}],
        'layout': {'title': artist_name}
    }, show_link=False)
    #fig = plt.figure()
    #df.total.plot(label='total')
    #df.userscript.plot(style='ks', label='userscript')
    #df[editor].plot(style='ko', label='my contribution')
    #fig.legend()
    #fig.suptitle(artist_name)
    return df
In [6]:
leifs = recording_evolution('Jón Leifs')
In [7]:
webern = recording_evolution('Anton Webern')
In [8]:
ravel = recording_evolution('Maurice Ravel')
In [9]:
debussy = recording_evolution('Claude Debussy')
In [10]:
chopin = recording_evolution('Fryderyk Chopin')
In [11]:
bach = recording_evolution('Johann Sebastian Bach')
In [12]:
brahms = recording_evolution('Johannes Brahms')
In [13]:
beethoven = recording_evolution('Ludwig van Beethoven')
In [14]:
schubert = recording_evolution('Franz Schubert')
In [15]:
dvorak = recording_evolution('Antonín Dvořák')